{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['', '', 'U.S. Census Bureau']\n",
      "['Jan-1992', 29811.0, '']\n",
      "['Jan-2004', 61146.0, '']\n",
      "['Jul-2017', 104155.0, '']\n",
      "104155.0\n",
      "[61146.0, 65230.0, 78662.0, 73252.0, 77491.0, 75355.0, 78837.0, 76377.0, 73580.0, 68745.0, 65221.0, 72476.0, 61936.0, 66945.0, 80609.0, 77669.0, 78327.0, 85378.0, 87710.0, 81220.0, 69412.0, 63594.0, 64845.0, 70662.0, 65737.0, 67452.0, 82754.0, 76180.0, 81517.0, 80599.0, 80889.0, 82997.0, 73000.0, 70365.0, 67512.0, 70995.0, 66585.0, 69373.0, 83833.0, 77265.0, 85183.0, 79824.0, 79097.0, 84633.0, 73470.0, 74461.0, 67344.0, 69071.0, 65818.0, 69030.0, 76625.0, 73759.0, 76177.0, 69437.0, 69673.0, 69674.0, 60493.0, 55015.0, 47840.0, 52324.0, 50083.0, 49992.0, 57146.0, 55945.0, 57768.0, 59556.0, 62323.0, 66250.0, 52143.0, 54849.0, 49983.0, 55734.0, 49827.0, 51833.0, 67517.0, 64426.0, 65287.0, 64707.0, 67027.0, 66058.0, 61650.0, 61349.0, 58778.0, 64454.0, 57886.0, 62633.0, 75255.0, 69622.0, 69355.0, 70251.0, 69214.0, 71332.0, 66905.0, 65850.0, 63851.0, 70784.0, 62120.0, 70401.0, 81275.0, 73561.0, 79851.0, 75976.0, 75234.0, 80725.0, 71299.0, 72257.0, 69650.0, 74145.0, 69283.0, 73008.0, 83626.0, 81492.0, 86712.0, 82587.0, 86033.0, 88684.0, 75505.0, 78797.0, 75377.0, 78190.0, 70375.0, 74939.0, 90507.0, 87984.0, 93250.0, 86710.0, 91278.0, 92619.0, 83350.0, 84325.0, 78958.0, 86556.0, 78469.0, 79596.0, 96542.0, 93763.0, 97597.0, 94779.0, 97889.0, 98014.0, 90906.0, 90432.0, 84106.0, 93319.0, 80013.0, 89217.0, 100052.0, 96610.0, 97804.0, 97437.0, 98760.0, 103208.0, 94771.0, 92180.0, 89676.0, 100414.0, 84658.0, 89010.0, 105946.0, 97175.0, 105685.0, 102951.0, 104155.0]\n"
     ]
    }
   ],
   "source": [
    "# data come from\n",
    "# https://www.census.gov/econ/currentdata/dbsearch?program=MARTS&startYear=1992&endYear=2017&categories%5B%5D=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId\n",
    "# 441: Motor Vehicle and Parts Dealers: U.S. Total — Not Seasonally Adjusted Sales - Monthly [Millions of Dollars]\n",
    "import xlrd\n",
    "book = xlrd.open_workbook('SeriesReport-201708162020.xls') # this file is in the same folder as this notebook\n",
    "first_sheet = book.sheet_by_index(0) # get the first sheet of the workbook\n",
    "print(first_sheet.row_values(0)) \n",
    "print(first_sheet.row_values(8))\n",
    "print(first_sheet.row_values(152))\n",
    "print(first_sheet.row_values(314))\n",
    "print(first_sheet.row_values(314)[1]) \n",
    "\n",
    "# creating the sales variable - we only need data from January 2004 since data on google search only starts then\n",
    "sales=[]\n",
    "for i in range(152,315):\n",
    "    sales=sales+[first_sheet.row_values(i)[1]]\n",
    "print(sales)\n",
    "# note this goes to July 2017!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['Category: Trucks & SUVs'], [], ['Month', 'Geo: United States'], ['2004-01', '95'], ['2004-02', '93'], ['2004-03', '94'], ['2004-04', '95'], ['2004-05', '93'], ['2004-06', '94'], ['2004-07', '99'], ['2004-08', '100'], ['2004-09', '97'], ['2004-10', '86'], ['2004-11', '81'], ['2004-12', '85'], ['2005-01', '89'], ['2005-02', '91'], ['2005-03', '89'], ['2005-04', '83'], ['2005-05', '84'], ['2005-06', '87'], ['2005-07', '93'], ['2005-08', '93'], ['2005-09', '78'], ['2005-10', '76'], ['2005-11', '74'], ['2005-12', '76'], ['2006-01', '79'], ['2006-02', '79'], ['2006-03', '79'], ['2006-04', '81'], ['2006-05', '77'], ['2006-06', '82'], ['2006-07', '87'], ['2006-08', '87'], ['2006-09', '77'], ['2006-10', '74'], ['2006-11', '72'], ['2006-12', '73'], ['2007-01', '74'], ['2007-02', '78'], ['2007-03', '75'], ['2007-04', '76'], ['2007-05', '73'], ['2007-06', '73'], ['2007-07', '79'], ['2007-08', '83'], ['2007-09', '74'], ['2007-10', '70'], ['2007-11', '66'], ['2007-12', '68'], ['2008-01', '70'], ['2008-02', '71'], ['2008-03', '69'], ['2008-04', '67'], ['2008-05', '66'], ['2008-06', '63'], ['2008-07', '65'], ['2008-08', '65'], ['2008-09', '59'], ['2008-10', '58'], ['2008-11', '57'], ['2008-12', '60'], ['2009-01', '60'], ['2009-02', '58'], ['2009-03', '59'], ['2009-04', '58'], ['2009-05', '57'], ['2009-06', '57'], ['2009-07', '62'], ['2009-08', '65'], ['2009-09', '56'], ['2009-10', '55'], ['2009-11', '53'], ['2009-12', '56'], ['2010-01', '56'], ['2010-02', '59'], ['2010-03', '58'], ['2010-04', '56'], ['2010-05', '53'], ['2010-06', '54'], ['2010-07', '61'], ['2010-08', '61'], ['2010-09', '57'], ['2010-10', '54'], ['2010-11', '52'], ['2010-12', '55'], ['2011-01', '68'], ['2011-02', '69'], ['2011-03', '66'], ['2011-04', '66'], ['2011-05', '63'], ['2011-06', '65'], ['2011-07', '69'], ['2011-08', '69'], ['2011-09', '64'], ['2011-10', '63'], ['2011-11', '62'], ['2011-12', '64'], ['2012-01', '66'], ['2012-02', '67'], ['2012-03', '66'], ['2012-04', '67'], ['2012-05', '65'], ['2012-06', '66'], ['2012-07', '71'], ['2012-08', '73'], ['2012-09', '67'], ['2012-10', '63'], ['2012-11', '62'], ['2012-12', '66'], ['2013-01', '67'], ['2013-02', '69'], ['2013-03', '69'], ['2013-04', '67'], ['2013-05', '66'], ['2013-06', '66'], ['2013-07', '72'], ['2013-08', '72'], ['2013-09', '65'], ['2013-10', '62'], ['2013-11', '62'], ['2013-12', '63'], ['2014-01', '64'], ['2014-02', '67'], ['2014-03', '67'], ['2014-04', '67'], ['2014-05', '63'], ['2014-06', '63'], ['2014-07', '67'], ['2014-08', '66'], ['2014-09', '62'], ['2014-10', '61'], ['2014-11', '60'], ['2014-12', '59'], ['2015-01', '61'], ['2015-02', '66'], ['2015-03', '62'], ['2015-04', '61'], ['2015-05', '59'], ['2015-06', '61'], ['2015-07', '64'], ['2015-08', '64'], ['2015-09', '59'], ['2015-10', '57'], ['2015-11', '55'], ['2015-12', '57'], ['2016-01', '81'], ['2016-02', '87'], ['2016-03', '84'], ['2016-04', '82'], ['2016-05', '82'], ['2016-06', '85'], ['2016-07', '89'], ['2016-08', '89'], ['2016-09', '87'], ['2016-10', '83'], ['2016-11', '78'], ['2016-12', '85'], ['2017-01', '85'], ['2017-02', '88'], ['2017-03', '89'], ['2017-04', '86'], ['2017-05', '87'], ['2017-06', '93'], ['2017-07', '96'], ['2017-08', '99']]\n"
     ]
    }
   ],
   "source": [
    "# opening CSV files - data downloaded from google trends\n",
    "import csv\n",
    "with open(\"Trucks and SUVs 17 08 2017 .csv\", 'r') as f:\n",
    "  reader = csv.reader(f)\n",
    "  TrucksSUVs = list(reader)\n",
    "print( TrucksSUVs)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[95.0, 93.0, 94.0, 95.0, 93.0, 94.0, 99.0, 100.0, 97.0, 86.0, 81.0, 85.0, 89.0, 91.0, 89.0, 83.0, 84.0, 87.0, 93.0, 93.0, 78.0, 76.0, 74.0, 76.0, 79.0, 79.0, 79.0, 81.0, 77.0, 82.0, 87.0, 87.0, 77.0, 74.0, 72.0, 73.0, 74.0, 78.0, 75.0, 76.0, 73.0, 73.0, 79.0, 83.0, 74.0, 70.0, 66.0, 68.0, 70.0, 71.0, 69.0, 67.0, 66.0, 63.0, 65.0, 65.0, 59.0, 58.0, 57.0, 60.0, 60.0, 58.0, 59.0, 58.0, 57.0, 57.0, 62.0, 65.0, 56.0, 55.0, 53.0, 56.0, 56.0, 59.0, 58.0, 56.0, 53.0, 54.0, 61.0, 61.0, 57.0, 54.0, 52.0, 55.0, 68.0, 69.0, 66.0, 66.0, 63.0, 65.0, 69.0, 69.0, 64.0, 63.0, 62.0, 64.0, 66.0, 67.0, 66.0, 67.0, 65.0, 66.0, 71.0, 73.0, 67.0, 63.0, 62.0, 66.0, 67.0, 69.0, 69.0, 67.0, 66.0, 66.0, 72.0, 72.0, 65.0, 62.0, 62.0, 63.0, 64.0, 67.0, 67.0, 67.0, 63.0, 63.0, 67.0, 66.0, 62.0, 61.0, 60.0, 59.0, 61.0, 66.0, 62.0, 61.0, 59.0, 61.0, 64.0, 64.0, 59.0, 57.0, 55.0, 57.0, 81.0, 87.0, 84.0, 82.0, 82.0, 85.0, 89.0, 89.0, 87.0, 83.0, 78.0, 85.0, 85.0, 88.0, 89.0, 86.0, 87.0, 93.0, 96.0, 99.0]\n",
      "164\n",
      "163\n",
      "[95.0, 93.0, 94.0, 95.0, 93.0, 94.0, 99.0, 100.0, 97.0, 86.0, 81.0, 85.0, 89.0, 91.0, 89.0, 83.0, 84.0, 87.0, 93.0, 93.0, 78.0, 76.0, 74.0, 76.0, 79.0, 79.0, 79.0, 81.0, 77.0, 82.0, 87.0, 87.0, 77.0, 74.0, 72.0, 73.0, 74.0, 78.0, 75.0, 76.0, 73.0, 73.0, 79.0, 83.0, 74.0, 70.0, 66.0, 68.0, 70.0, 71.0, 69.0, 67.0, 66.0, 63.0, 65.0, 65.0, 59.0, 58.0, 57.0, 60.0, 60.0, 58.0, 59.0, 58.0, 57.0, 57.0, 62.0, 65.0, 56.0, 55.0, 53.0, 56.0, 56.0, 59.0, 58.0, 56.0, 53.0, 54.0, 61.0, 61.0, 57.0, 54.0, 52.0, 55.0, 68.0, 69.0, 66.0, 66.0, 63.0, 65.0, 69.0, 69.0, 64.0, 63.0, 62.0, 64.0, 66.0, 67.0, 66.0, 67.0, 65.0, 66.0, 71.0, 73.0, 67.0, 63.0, 62.0, 66.0, 67.0, 69.0, 69.0, 67.0, 66.0, 66.0, 72.0, 72.0, 65.0, 62.0, 62.0, 63.0, 64.0, 67.0, 67.0, 67.0, 63.0, 63.0, 67.0, 66.0, 62.0, 61.0, 60.0, 59.0, 61.0, 66.0, 62.0, 61.0, 59.0, 61.0, 64.0, 64.0, 59.0, 57.0, 55.0, 57.0, 81.0, 87.0, 84.0, 82.0, 82.0, 85.0, 89.0, 89.0, 87.0, 83.0, 78.0, 85.0, 85.0, 88.0, 89.0, 86.0, 87.0, 93.0, 96.0]\n"
     ]
    }
   ],
   "source": [
    "#creating the Trucks and SUVs variable - note that the numbers in the file are considered as text so we need to make them floats\n",
    "TS=[]\n",
    "for i in range(3,len(TrucksSUVs)): # the first three elements in TrucksSUVs are irrelevant\n",
    "    TS=TS+[float(TrucksSUVs[i][1])]\n",
    "print(TS) # note this includes august 2017!\n",
    "print(len(TS))\n",
    "TS=TS[0:len(TS)-1] # to get til July 2017\n",
    "print(len(TS))\n",
    "print(TS) # \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[['Category: Auto Insurance'], [], ['Month', 'Geo: United States'], ['2004-01', '90'], ['2004-02', '86'], ['2004-03', '84'], ['2004-04', '87'], ['2004-05', '86'], ['2004-06', '93'], ['2004-07', '94'], ['2004-08', '100'], ['2004-09', '87'], ['2004-10', '79'], ['2004-11', '73'], ['2004-12', '78'], ['2005-01', '85'], ['2005-02', '83'], ['2005-03', '82'], ['2005-04', '77'], ['2005-05', '81'], ['2005-06', '86'], ['2005-07', '85'], ['2005-08', '90'], ['2005-09', '83'], ['2005-10', '73'], ['2005-11', '69'], ['2005-12', '70'], ['2006-01', '79'], ['2006-02', '74'], ['2006-03', '75'], ['2006-04', '76'], ['2006-05', '77'], ['2006-06', '80'], ['2006-07', '84'], ['2006-08', '85'], ['2006-09', '74'], ['2006-10', '71'], ['2006-11', '65'], ['2006-12', '66'], ['2007-01', '74'], ['2007-02', '72'], ['2007-03', '72'], ['2007-04', '72'], ['2007-05', '70'], ['2007-06', '70'], ['2007-07', '73'], ['2007-08', '72'], ['2007-09', '61'], ['2007-10', '63'], ['2007-11', '55'], ['2007-12', '54'], ['2008-01', '62'], ['2008-02', '59'], ['2008-03', '57'], ['2008-04', '59'], ['2008-05', '58'], ['2008-06', '57'], ['2008-07', '59'], ['2008-08', '57'], ['2008-09', '54'], ['2008-10', '51'], ['2008-11', '44'], ['2008-12', '46'], ['2009-01', '51'], ['2009-02', '50'], ['2009-03', '50'], ['2009-04', '49'], ['2009-05', '45'], ['2009-06', '47'], ['2009-07', '49'], ['2009-08', '49'], ['2009-09', '44'], ['2009-10', '42'], ['2009-11', '38'], ['2009-12', '39'], ['2010-01', '42'], ['2010-02', '42'], ['2010-03', '43'], ['2010-04', '41'], ['2010-05', '39'], ['2010-06', '42'], ['2010-07', '45'], ['2010-08', '46'], ['2010-09', '42'], ['2010-10', '40'], ['2010-11', '37'], ['2010-12', '37'], ['2011-01', '51'], ['2011-02', '51'], ['2011-03', '51'], ['2011-04', '51'], ['2011-05', '49'], ['2011-06', '53'], ['2011-07', '53'], ['2011-08', '55'], ['2011-09', '50'], ['2011-10', '47'], ['2011-11', '45'], ['2011-12', '45'], ['2012-01', '50'], ['2012-02', '51'], ['2012-03', '49'], ['2012-04', '48'], ['2012-05', '50'], ['2012-06', '50'], ['2012-07', '52'], ['2012-08', '53'], ['2012-09', '49'], ['2012-10', '48'], ['2012-11', '45'], ['2012-12', '43'], ['2013-01', '49'], ['2013-02', '49'], ['2013-03', '48'], ['2013-04', '49'], ['2013-05', '48'], ['2013-06', '49'], ['2013-07', '52'], ['2013-08', '52'], ['2013-09', '48'], ['2013-10', '48'], ['2013-11', '43'], ['2013-12', '42'], ['2014-01', '47'], ['2014-02', '49'], ['2014-03', '49'], ['2014-04', '48'], ['2014-05', '45'], ['2014-06', '45'], ['2014-07', '46'], ['2014-08', '46'], ['2014-09', '44'], ['2014-10', '42'], ['2014-11', '38'], ['2014-12', '39'], ['2015-01', '41'], ['2015-02', '43'], ['2015-03', '40'], ['2015-04', '41'], ['2015-05', '38'], ['2015-06', '42'], ['2015-07', '42'], ['2015-08', '42'], ['2015-09', '39'], ['2015-10', '37'], ['2015-11', '35'], ['2015-12', '36'], ['2016-01', '48'], ['2016-02', '53'], ['2016-03', '50'], ['2016-04', '51'], ['2016-05', '52'], ['2016-06', '51'], ['2016-07', '54'], ['2016-08', '56'], ['2016-09', '53'], ['2016-10', '52'], ['2016-11', '48'], ['2016-12', '50'], ['2017-01', '54'], ['2017-02', '55'], ['2017-03', '57'], ['2017-04', '54'], ['2017-05', '57'], ['2017-06', '60'], ['2017-07', '58'], ['2017-08', '62']]\n",
      "[90.0, 86.0, 84.0, 87.0, 86.0, 93.0, 94.0, 100.0, 87.0, 79.0, 73.0, 78.0, 85.0, 83.0, 82.0, 77.0, 81.0, 86.0, 85.0, 90.0, 83.0, 73.0, 69.0, 70.0, 79.0, 74.0, 75.0, 76.0, 77.0, 80.0, 84.0, 85.0, 74.0, 71.0, 65.0, 66.0, 74.0, 72.0, 72.0, 72.0, 70.0, 70.0, 73.0, 72.0, 61.0, 63.0, 55.0, 54.0, 62.0, 59.0, 57.0, 59.0, 58.0, 57.0, 59.0, 57.0, 54.0, 51.0, 44.0, 46.0, 51.0, 50.0, 50.0, 49.0, 45.0, 47.0, 49.0, 49.0, 44.0, 42.0, 38.0, 39.0, 42.0, 42.0, 43.0, 41.0, 39.0, 42.0, 45.0, 46.0, 42.0, 40.0, 37.0, 37.0, 51.0, 51.0, 51.0, 51.0, 49.0, 53.0, 53.0, 55.0, 50.0, 47.0, 45.0, 45.0, 50.0, 51.0, 49.0, 48.0, 50.0, 50.0, 52.0, 53.0, 49.0, 48.0, 45.0, 43.0, 49.0, 49.0, 48.0, 49.0, 48.0, 49.0, 52.0, 52.0, 48.0, 48.0, 43.0, 42.0, 47.0, 49.0, 49.0, 48.0, 45.0, 45.0, 46.0, 46.0, 44.0, 42.0, 38.0, 39.0, 41.0, 43.0, 40.0, 41.0, 38.0, 42.0, 42.0, 42.0, 39.0, 37.0, 35.0, 36.0, 48.0, 53.0, 50.0, 51.0, 52.0, 51.0, 54.0, 56.0, 53.0, 52.0, 48.0, 50.0, 54.0, 55.0, 57.0, 54.0, 57.0, 60.0, 58.0, 62.0]\n"
     ]
    }
   ],
   "source": [
    "# doing same for autoinsurance - data downloaded from google trends\n",
    "with open(\"Auto Insurance 17 08 2017.csv\", 'r') as f:\n",
    "  reader = csv.reader(f)\n",
    "  AutoIns = list(reader)\n",
    "print(AutoIns)\n",
    "AI=[]\n",
    "for i in range(3,len(AutoIns)):\n",
    "    AI=AI+[float(AutoIns[i][1])]\n",
    "print(AI) # note this includes august 2017!\n",
    "AI=AI[0:len(AI)-1] # to get til July 2017\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     const        AI        TS  lagged sales 1  lagged sales 12\n",
      "0      1.0  4.499810  4.553877             NaN              NaN\n",
      "1      1.0  4.454347  4.532599       11.021020              NaN\n",
      "2      1.0  4.430817  4.543295       11.085675              NaN\n",
      "3      1.0  4.465908  4.553877       11.272915              NaN\n",
      "4      1.0  4.454347  4.532599       11.201661              NaN\n",
      "5      1.0  4.532599  4.543295       11.257917              NaN\n",
      "6      1.0  4.543295  4.595120       11.229966              NaN\n",
      "7      1.0  4.605170  4.605170       11.275138              NaN\n",
      "8      1.0  4.465908  4.574711       11.243437              NaN\n",
      "9      1.0  4.369448  4.454347       11.206129              NaN\n",
      "10     1.0  4.290459  4.394449       11.138159              NaN\n",
      "11     1.0  4.356709  4.442651       11.085537              NaN\n",
      "12     1.0  4.442651  4.488636       11.191011        11.021020\n",
      "13     1.0  4.418841  4.510860       11.033857        11.085675\n",
      "14     1.0  4.406719  4.488636       11.111627        11.272915\n",
      "15     1.0  4.343805  4.418841       11.297366        11.201661\n",
      "16     1.0  4.394449  4.430817       11.260211        11.257917\n",
      "17     1.0  4.454347  4.465908       11.268648        11.229966\n",
      "18     1.0  4.442651  4.532599       11.354844        11.275138\n",
      "19     1.0  4.499810  4.532599       11.381791        11.243437\n",
      "20     1.0  4.418841  4.356709       11.304917        11.206129\n",
      "21     1.0  4.290459  4.330733       11.147815        11.138159\n",
      "22     1.0  4.234107  4.304065       11.060274        11.085537\n",
      "23     1.0  4.248495  4.330733       11.079755        11.191011\n",
      "24     1.0  4.369448  4.369448       11.165663        11.033857\n",
      "25     1.0  4.304065  4.369448       11.093417        11.111627\n",
      "26     1.0  4.317488  4.369448       11.119172        11.297366\n",
      "27     1.0  4.330733  4.394449       11.323628        11.260211\n",
      "28     1.0  4.343805  4.343805       11.240854        11.268648\n",
      "29     1.0  4.382027  4.406719       11.308567        11.354844\n",
      "..     ...       ...       ...             ...              ...\n",
      "133    1.0  3.761200  4.189655       11.270459        11.224430\n",
      "134    1.0  3.688879  4.127134       11.284719        11.413182\n",
      "135    1.0  3.713572  4.110874       11.477733        11.384910\n",
      "136    1.0  3.637586  4.077537       11.448526        11.443039\n",
      "137    1.0  3.737670  4.110874       11.488602        11.370324\n",
      "138    1.0  3.737670  4.158883       11.459303        11.421665\n",
      "139    1.0  3.737670  4.158883       11.491589        11.436250\n",
      "140    1.0  3.663562  4.077537       11.492866        11.330804\n",
      "141    1.0  3.610918  4.043051       11.417581        11.342434\n",
      "142    1.0  3.555348  4.007333       11.412353        11.276671\n",
      "143    1.0  3.583519  4.043051       11.339833        11.368547\n",
      "144    1.0  3.871201  4.394449       11.443779        11.270459\n",
      "145    1.0  3.970292  4.465908       11.289944        11.284719\n",
      "146    1.0  3.912023  4.430817       11.398827        11.477733\n",
      "147    1.0  3.931826  4.406719       11.513445        11.448526\n",
      "148    1.0  3.951244  4.406719       11.478438        11.488602\n",
      "149    1.0  3.931826  4.442651       11.490721        11.459303\n",
      "150    1.0  3.988984  4.488636       11.486961        11.491589\n",
      "151    1.0  4.025352  4.488636       11.500448        11.492866\n",
      "152    1.0  3.970292  4.465908       11.544502        11.417581\n",
      "153    1.0  3.951244  4.418841       11.459219        11.412353\n",
      "154    1.0  3.871201  4.356709       11.431498        11.339833\n",
      "155    1.0  3.912023  4.442651       11.403958        11.443779\n",
      "156    1.0  3.988984  4.442651       11.517057        11.289944\n",
      "157    1.0  4.007333  4.477337       11.346375        11.398827\n",
      "158    1.0  4.043051  4.488636       11.396504        11.513445\n",
      "159    1.0  3.988984  4.454347       11.570685        11.478438\n",
      "160    1.0  4.043051  4.465908       11.484269        11.490721\n",
      "161    1.0  4.094345  4.532599       11.568218        11.486961\n",
      "162    1.0  4.060443  4.564348       11.542008        11.500448\n",
      "\n",
      "[163 rows x 5 columns]\n",
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  sales   R-squared:                       0.867\n",
      "Model:                            OLS   Adj. R-squared:                  0.864\n",
      "Method:                 Least Squares   F-statistic:                     238.4\n",
      "Date:                Mon, 21 Aug 2017   Prob (F-statistic):           6.28e-63\n",
      "Time:                        09:37:02   Log-Likelihood:                 195.88\n",
      "No. Observations:                 151   AIC:                            -381.8\n",
      "Df Residuals:                     146   BIC:                            -366.7\n",
      "Df Model:                           4                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "===================================================================================\n",
      "                      coef    std err          t      P>|t|      [0.025      0.975]\n",
      "-----------------------------------------------------------------------------------\n",
      "const               0.4856      0.423      1.147      0.253      -0.351       1.322\n",
      "AI                 -0.1899      0.044     -4.276      0.000      -0.278      -0.102\n",
      "TS                  0.3360      0.078      4.293      0.000       0.181       0.491\n",
      "lagged sales 1      0.4955      0.051      9.673      0.000       0.394       0.597\n",
      "lagged sales 12     0.4027      0.052      7.784      0.000       0.300       0.505\n",
      "==============================================================================\n",
      "Omnibus:                        8.966   Durbin-Watson:                   1.589\n",
      "Prob(Omnibus):                  0.011   Jarque-Bera (JB):               10.731\n",
      "Skew:                          -0.403   Prob(JB):                      0.00467\n",
      "Kurtosis:                       4.027   Cond. No.                     1.32e+03\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
      "[2] The condition number is large, 1.32e+03. This might indicate that there are\n",
      "strong multicollinearity or other numerical problems.\n"
     ]
    }
   ],
   "source": [
    "# let's now run a regression to see whether the google search terms contribute to the predictive power\n",
    "#first we run a regression with all the data we have\n",
    "import statsmodels.api as sm\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "AI=np.log(AI) # the paper does it with logs so we transform everything in logs\n",
    "TS=np.log(TS)\n",
    "sales=np.log(sales)\n",
    "x1 = pd.Series(AI, name='AI') \n",
    "x2=pd.Series(TS, name='TS')\n",
    "x=pd.concat([x1, x2], axis=1)\n",
    "x=sm.add_constant(x)\n",
    "y = pd.Series(sales, name='sales') \n",
    "lag=y.shift(+1) # we want to regress on lagged dependent variable (Autoregressive model) - in python +1 is lagged one period\n",
    "x=pd.concat([x, lag], axis=1)\n",
    "x = x.rename(columns={'sales': 'lagged sales 1'})\n",
    "lag=y.shift(+12) # lag one year, i.e. 12 months\n",
    "x=pd.concat([x, lag], axis=1)\n",
    "x = x.rename(columns={'sales': 'lagged sales 12'}) # to add names to the variables\n",
    "\n",
    "print(x)\n",
    "model = sm.OLS(y, x, missing='drop')\n",
    "results = model.fit()\n",
    "print(results.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  sales   R-squared:                       0.849\n",
      "Model:                            OLS   Adj. R-squared:                  0.847\n",
      "Method:                 Least Squares   F-statistic:                     416.0\n",
      "Date:                Mon, 21 Aug 2017   Prob (F-statistic):           1.77e-61\n",
      "Time:                        09:37:02   Log-Likelihood:                 186.16\n",
      "No. Observations:                 151   AIC:                            -366.3\n",
      "Df Residuals:                     148   BIC:                            -357.3\n",
      "Df Model:                           2                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "===================================================================================\n",
      "                      coef    std err          t      P>|t|      [0.025      0.975]\n",
      "-----------------------------------------------------------------------------------\n",
      "const              -0.3096      0.407     -0.761      0.448      -1.113       0.494\n",
      "lagged sales 1      0.5955      0.049     12.225      0.000       0.499       0.692\n",
      "lagged sales 12     0.4331      0.053      8.169      0.000       0.328       0.538\n",
      "==============================================================================\n",
      "Omnibus:                        7.703   Durbin-Watson:                   1.611\n",
      "Prob(Omnibus):                  0.021   Jarque-Bera (JB):                9.048\n",
      "Skew:                          -0.354   Prob(JB):                       0.0108\n",
      "Kurtosis:                       3.968   Cond. No.                     1.12e+03\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
      "[2] The condition number is large, 1.12e+03. This might indicate that there are\n",
      "strong multicollinearity or other numerical problems.\n"
     ]
    }
   ],
   "source": [
    "# compare the R2 when search terms are excluded\n",
    "lagsonlyx=x[[\"const\", \"lagged sales 1\",\"lagged sales 12\"]] # this is to select the lags only from the x dataframe\n",
    "model = sm.OLS(y, lagsonlyx, missing='drop')\n",
    "results = model.fit()\n",
    "print(results.summary())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  sales   R-squared:                       0.721\n",
      "Model:                            OLS   Adj. R-squared:                  0.714\n",
      "Method:                 Least Squares   F-statistic:                     98.33\n",
      "Date:                Mon, 21 Aug 2017   Prob (F-statistic):           8.27e-22\n",
      "Time:                        09:37:02   Log-Likelihood:                 88.759\n",
      "No. Observations:                  79   AIC:                            -171.5\n",
      "Df Residuals:                      76   BIC:                            -164.4\n",
      "Df Model:                           2                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "===================================================================================\n",
      "                      coef    std err          t      P>|t|      [0.025      0.975]\n",
      "-----------------------------------------------------------------------------------\n",
      "const               0.6677      0.760      0.879      0.382      -0.845       2.180\n",
      "lagged sales 1      0.6511      0.073      8.939      0.000       0.506       0.796\n",
      "lagged sales 12     0.2885      0.072      3.991      0.000       0.145       0.432\n",
      "==============================================================================\n",
      "Omnibus:                        2.204   Durbin-Watson:                   1.649\n",
      "Prob(Omnibus):                  0.332   Jarque-Bera (JB):                1.702\n",
      "Skew:                          -0.064   Prob(JB):                        0.427\n",
      "Kurtosis:                       3.708   Cond. No.                     1.33e+03\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
      "[2] The condition number is large, 1.33e+03. This might indicate that there are\n",
      "strong multicollinearity or other numerical problems.\n"
     ]
    }
   ],
   "source": [
    "# in the paper the dataset only goes til july 2011 - hence we cut our longer dataset back to jan 2014 -july 2011\n",
    "shortx=x[:91]\n",
    "# to see the impact on the predictive power we want to see how the R2 compares when we add the search terms to the AR model only\n",
    "lagsonlyx=shortx[[\"const\", \"lagged sales 1\",\"lagged sales 12\"]] # this is to select the lags only from the x dataframe\n",
    "\n",
    "shorty=y[:91]\n",
    "model = sm.OLS(shorty, lagsonlyx, missing='drop')\n",
    "results = model.fit()\n",
    "print(results.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            OLS Regression Results                            \n",
      "==============================================================================\n",
      "Dep. Variable:                  sales   R-squared:                       0.794\n",
      "Model:                            OLS   Adj. R-squared:                  0.783\n",
      "Method:                 Least Squares   F-statistic:                     71.37\n",
      "Date:                Mon, 21 Aug 2017   Prob (F-statistic):           1.22e-24\n",
      "Time:                        09:37:02   Log-Likelihood:                 100.73\n",
      "No. Observations:                  79   AIC:                            -191.5\n",
      "Df Residuals:                      74   BIC:                            -179.6\n",
      "Df Model:                           4                                         \n",
      "Covariance Type:            nonrobust                                         \n",
      "===================================================================================\n",
      "                      coef    std err          t      P>|t|      [0.025      0.975]\n",
      "-----------------------------------------------------------------------------------\n",
      "const              -0.6818      1.037     -0.657      0.513      -2.748       1.385\n",
      "AI                 -0.5098      0.134     -3.799      0.000      -0.777      -0.242\n",
      "TS                  0.9500      0.198      4.792      0.000       0.555       1.345\n",
      "lagged sales 1      0.5298      0.077      6.886      0.000       0.376       0.683\n",
      "lagged sales 12     0.3565      0.075      4.783      0.000       0.208       0.505\n",
      "==============================================================================\n",
      "Omnibus:                        3.674   Durbin-Watson:                   1.718\n",
      "Prob(Omnibus):                  0.159   Jarque-Bera (JB):                3.933\n",
      "Skew:                           0.093   Prob(JB):                        0.140\n",
      "Kurtosis:                       4.077   Cond. No.                     2.24e+03\n",
      "==============================================================================\n",
      "\n",
      "Warnings:\n",
      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
      "[2] The condition number is large, 2.24e+03. This might indicate that there are\n",
      "strong multicollinearity or other numerical problems.\n"
     ]
    }
   ],
   "source": [
    "# now lets see what adding the search terms adds to R2\n",
    "shortx=x[:91]\n",
    "shorty=y[:91]\n",
    "model = sm.OLS(shorty, shortx, missing='drop')\n",
    "results = model.fit()\n",
    "print(results.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
